import os
import urllib.request
from bs4 import BeautifulSoup

def crawl(url, headers):
  page = urllib.request.Request(url, headers=headers)
  page = urllib.request.urlopen(page)
  contents = page.read()

  # print(contents)

  soup = BeautifulSoup(contents, "html.parser")

  print(soup.title.string)

  imgfile = open('./file/b.img.txt', 'w', encoding='utf8')
  file = open('./file/b.p.txt', 'w', encoding='utf8')

  for img in soup.find_all('img'):
    imgfile.writelines([img['src'], '\n'])

  for p in soup('p', class_='MsoNormal', align='center'):
    if p.string != None:
      file.writelines([p.string, '\n'])

  imgfile.close()
  file.close()

if __name__ == '__main__':
  url = "http://www.stats.gov.cn/tjsj/zxfb/202302/t20230216_1908096.html"
  headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) \
              AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'}

  crawl(url, headers)